package com.micai.boss.utils.images;

import net.sourceforge.tess4j.Tesseract;

import javax.imageio.ImageIO;
import java.awt.image.BufferedImage;
import java.io.File;
import java.io.IOException;

/**
 * @author zhaoxinguo
 * @version 1.0.0
 * @project micai-boss
 * @description 识别图片中的文字，参考地址：https://blog.csdn.net/qq_38653981/article/details/130183351
 * https://blog.csdn.net/juzicode00/article/details/121343486
 * @date 2024/3/9 10:08:47
 */
public class ImageReadText {

    public static void main(String[] args) throws IOException {
        Tesseract tesseract = new Tesseract();
        //设置语言库位置
        tesseract.setDatapath("E:\\BaiduNetdiskDownload\\个人资料\\绘本故事\\tessdata");
        //设置语言类型
        tesseract.setLanguage("chi_sim");
        //img
        BufferedImage image = ImageIO.read(new File("E:\\BaiduNetdiskDownload\\个人资料\\绘本故事\\temp\\卢森堡公园的一天_4.png"));
        try  {
            String result = tesseract.doOCR(image);
            //PDF
            //String result = tesseract.doOCR(new File("xxx.pdf"));
            System.out.println(result);
        } catch (Exception e) {
            e.printStackTrace();
        }
    }

}
